## The functions in the rglwidget package have been moved to rgl.
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
## Loading required package: viridisLite
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
I have converted the Excel file to a CSV file so R can be able to read the data. Blank cells appear as N/A values here
#Load data
fepdata1 = read.csv("FEP_project_reloaded_28.4.2021.csv", stringsAsFactors = F)
fepdata = fepdata1[complete.cases(fepdata1[,c(6)]) | complete.cases(fepdata1[,c(7)]) | complete.cases(fepdata1[,c(8)]),]
#Make phenotype 0 and 1. 0 for Healthy, 1 for Cases.
fepdata$Phenotype = as.factor(ifelse(fepdata$Phenotype=="D",1,0))
fepdata$pheno_lm = as.factor(ifelse(fepdata$Phenotype==0,"Control","Case"))
#Add phenotype colours
selcols = c("#648FFF","#DC267F")
fepdata$colour = selcols[fepdata$Phenotype]
#Multivariate Analysis
par(mfrow=c(3,3))
hist(fepdata$pAkt.value, breaks=20, xlab="pAkt", main="pAkt")
hist(fepdata$pGSK3.value, breaks=20, xlab="pGSK3", main="pGSK3")
hist(fepdata$pS6.value, breaks=20, xlab="pS6", main="pS6")
plot(density(fepdata$pAkt.value, na.rm = T), main="")
plot(density(fepdata$pGSK3.value, na.rm = T), main="")
plot(density(fepdata$pS6.value, na.rm = T), main="")
qqnorm(fepdata$pAkt.value, pch = 1, frame = FALSE)
qqline(fepdata$pAkt.value, col = "steelblue", lwd = 2)
qqnorm(fepdata$pGSK3.value, pch = 1, frame = FALSE)
qqline(fepdata$pGSK3.value, col = "steelblue", lwd = 2)
qqnorm(fepdata$pS6.value, pch = 1, frame = FALSE)
qqline(fepdata$pS6.value, col = "steelblue", lwd = 2)
shapiro.test(fepdata$pAkt.value)
##
## Shapiro-Wilk normality test
##
## data: fepdata$pAkt.value
## W = 0.95549, p-value = 0.01239
shapiro.test(fepdata$pGSK3.value)
##
## Shapiro-Wilk normality test
##
## data: fepdata$pGSK3.value
## W = 0.93591, p-value = 0.001555
shapiro.test(fepdata$pS6.value)
##
## Shapiro-Wilk normality test
##
## data: fepdata$pS6.value
## W = 0.95438, p-value = 0.01076
All variables look left-skewed. The normality test shows that all values do not follow a normal distribution (p shapiro < 0.05).
fepdata$pAkt.value_z = as.vector(scale(fepdata$pAkt.value,center = T, scale=T))
fepdata$pS6.value_z = as.vector(scale(fepdata$pS6.value,center = T, scale=T))
fepdata$pGSK3.value_z = as.vector(scale(fepdata$pGSK3.value,center = T, scale=T))
boxplot(main="Before Z-score transformation", fepdata[,c(6:8)], las=1, col="steelblue", outline=F)
stripchart(fepdata[,c(6:8)], vertical = TRUE, method = "jitter", add = TRUE, pch = 20, col = 'blue')
boxplot(main="After Z-score transformation", fepdata[,c(12:14)], las=1, col="steelblue", outline=F)
stripchart(fepdata[,c(12:14)], vertical = TRUE, method = "jitter", add = TRUE, pch = 20, col = 'blue')
Let’s perform tests to see how different are the phosphorylation values for each kinase between cases and controls.
We are performing Mann–Whitney U test (non-parametric) as we cannot assume normality for any of the variables.
pAkt
pakt_test = wilcox.test(fepdata$pAkt.value[fepdata$Phenotype==0],fepdata$pAkt.value[fepdata$Phenotype==1])
fep_wide = fepdata[,c(1,9,6:8)] %>% pivot_longer(c(3:5), names_to = 'kinase', values_to = "phospho") %>% data.frame()
p <- ggplot(fep_wide, aes(x=kinase, y=phospho, fill=Phenotype)) + geom_boxplot(aes(fill=Phenotype),outlier.shape = NA)
p <- p + geom_point(position=position_jitterdodge(),alpha=0.3)
p <- p + facet_wrap( ~ kinase, scales="free")
p <- p + xlab("Kinase") + ylab("Phosphorylation")
p <- p + stat_compare_means(aes(group = Phenotype), label.x = 1, label.y = -0.1, size=3)
p <- p + guides(fill=guide_legend(title="Phenotype"))
p
pakt_test = wilcox.test(fepdata$pAkt.value[fepdata$Phenotype==0],fepdata$pAkt.value[fepdata$Phenotype==1])
pgsk3_test = wilcox.test(fepdata$pGSK3.value[fepdata$Phenotype==0],fepdata$pGSK3.value[fepdata$Phenotype==1])
ps6_test = wilcox.test(fepdata$pS6.value[fepdata$Phenotype==0],fepdata$pS6.value[fepdata$Phenotype==1])
pAkt Cases vs Controls
print(pakt_test)
##
## Wilcoxon rank sum exact test
##
## data: fepdata$pAkt.value[fepdata$Phenotype == 0] and fepdata$pAkt.value[fepdata$Phenotype == 1]
## W = 790, p-value = 0.1114
## alternative hypothesis: true location shift is not equal to 0
pGSK3 Cases vs Controls
print(pgsk3_test)
##
## Wilcoxon rank sum exact test
##
## data: fepdata$pGSK3.value[fepdata$Phenotype == 0] and fepdata$pGSK3.value[fepdata$Phenotype == 1]
## W = 365, p-value = 0.005368
## alternative hypothesis: true location shift is not equal to 0
pS6 Cases vs Control
print(ps6_test)
##
## Wilcoxon rank sum exact test
##
## data: fepdata$pS6.value[fepdata$Phenotype == 0] and fepdata$pS6.value[fepdata$Phenotype == 1]
## W = 969, p-value = 0.0002204
## alternative hypothesis: true location shift is not equal to 0
It appears the pS6 shows the greatest difference between cases and controls , compared to the other kinases. pGSK3 also shows a great statistically significant difference, while pAtk show a mild but significant difference.
Let’s perform tests to see how different are the phosphorylation values for each kinase between cases and controls using Z-score transformed value.
We are performing Mann–Whitney U test (non-parametric) as we cannot assume normality for any of the variables.
pAkt
pakt_test = wilcox.test(fepdata$pAkt.value_z[fepdata$Phenotype==0],fepdata$pAkt.value_z[fepdata$Phenotype==1])
fep_wide = fepdata[,c(1,9,12:14)] %>% pivot_longer(c(3:5), names_to = 'kinase', values_to = "phospho")
p <- ggplot(fep_wide, aes(x=kinase, y=phospho, fill=Phenotype)) + geom_boxplot(aes(fill=Phenotype),outlier.shape = NA)
p <- p + geom_point(position=position_jitterdodge(),alpha=0.3)
p <- p + facet_wrap( ~ kinase, scales="free")
p <- p + xlab("Kinase") + ylab("Phosphorylation")
p <- p + stat_compare_means(aes(group = Phenotype), label.x = 1, label.y = -2, size=3)
p <- p + guides(fill=guide_legend(title="Phenotype"))
p
pakt_test = wilcox.test(fepdata$pAkt.value_z[fepdata$Phenotype==0],fepdata$pAkt.value_z[fepdata$Phenotype==1])
pgsk3_test = wilcox.test(fepdata$pGSK3.value_z[fepdata$Phenotype==0],fepdata$pGSK3.value_z[fepdata$Phenotype==1])
ps6_test = wilcox.test(fepdata$pS6.value_z [fepdata$Phenotype==0],fepdata$pS6.value_z[fepdata$Phenotype==1])
pAkt Cases vs Controls
print(pakt_test)
##
## Wilcoxon rank sum exact test
##
## data: fepdata$pAkt.value_z[fepdata$Phenotype == 0] and fepdata$pAkt.value_z[fepdata$Phenotype == 1]
## W = 790, p-value = 0.1114
## alternative hypothesis: true location shift is not equal to 0
pGSK3 Cases vs Controls
print(pgsk3_test)
##
## Wilcoxon rank sum exact test
##
## data: fepdata$pGSK3.value_z[fepdata$Phenotype == 0] and fepdata$pGSK3.value_z[fepdata$Phenotype == 1]
## W = 365, p-value = 0.005368
## alternative hypothesis: true location shift is not equal to 0
pS6 Cases vs Control
print(ps6_test)
##
## Wilcoxon rank sum exact test
##
## data: fepdata$pS6.value_z[fepdata$Phenotype == 0] and fepdata$pS6.value_z[fepdata$Phenotype == 1]
## W = 969, p-value = 0.0002204
## alternative hypothesis: true location shift is not equal to 0
It appears the pS6 shows the greatest difference between cases and controls , compared to the other kinases. pGSK3 also shows a great statistically significant difference, while pAkt doesn’t seem to change.
Because the Phenotype is a catergorical and binary variable we’ll try to fit a binomial/logistic regression.
We’ll fit two models: 1) Phenotype ~ pS6 + pGSK3 + pAkt. 2) Phenotype ~ pS6 + pGSK3 + pAkt + all interactions between them.
Fit the models:
model_1 = glm(fepdata$pheno_lm ~ fepdata$pS6.value_z + fepdata$pGSK3.value_z + fepdata$pAkt.value_z, family="binomial")
model_2 = glm(fepdata$pheno_lm ~ fepdata$pS6.value_z * fepdata$pGSK3.value_z * fepdata$pAkt.value_z, family="binomial")
probabilities <- predict(model_1, type = "response")
all_models = list(model_1,model_2)
We need to check whether our data follows the logistic regression assumptions, to conclude if our model fitting is valid.
The logistic regression method assumes that:
That’s true. THe phenotype is a binary variable; Cases vs Controls.
# Select only numeric predictors
mydata <- fepdata[12:14] %>%
dplyr::select_if(is.numeric)
predictors <- colnames(mydata)
# Bind the logit and tidying the data for plot
mydata <- mydata %>% na.omit() %>%
mutate(logit = log(probabilities/(1-probabilities))) %>%
gather(key = "predictors", value = "predictor.value", -logit)
ggplot(mydata, aes(logit, predictor.value))+
geom_point(size = 0.5, alpha = 0.5) +
geom_smooth(method = "loess") +
theme_bw() +
facet_wrap(~predictors, scales = "free_y")
The smoothed scatter plots show that variables pAkt, pGSK3 and PS6 are all quite linearly associated with the Phenotype in logit scale. So this assumption is being fullfilled!
nameit <- function(v1) {
deparse(substitute(v1))
}
check_outliers <- function(model){
mdata <- augment(model) %>%
mutate(index = 1:n())
cat("Examined model standardised residuals: ")
cat(paste0(model$call,"\n"))
print(summary(mdata$`.std.resid`))
cat("\n\n")
}
lapply(all_models, function(x) check_outliers(x))
## Examined model standardised residuals: glm
## fepdata$pheno_lm ~ fepdata$pS6.value_z + fepdata$pGSK3.value_z + fepdata$pAkt.value_z
## binomial
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.888853 -0.730267 -0.133423 0.008236 0.739811 2.265495
##
##
## Examined model standardised residuals: glm
## fepdata$pheno_lm ~ fepdata$pS6.value_z * fepdata$pGSK3.value_z * fepdata$pAkt.value_z
## binomial
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -2.19971 -0.75261 -0.04279 -0.00820 0.63948 2.32111
## [[1]]
## NULL
##
## [[2]]
## NULL
There are no standardised residuals > 3 so our data doesn’t have influential data points!
call_vif <- function(model){
cat("Calling VIF for model:")
cat(paste0(model$call,"\n"))
print(vif(model))
cat("\n\n")
}
lapply(all_models, function(x) call_vif(x))
## Calling VIF for model:glm
## fepdata$pheno_lm ~ fepdata$pS6.value_z + fepdata$pGSK3.value_z + fepdata$pAkt.value_z
## binomial
## fepdata$pS6.value_z fepdata$pGSK3.value_z fepdata$pAkt.value_z
## 1.174901 1.365275 1.176885
##
##
## Calling VIF for model:glm
## fepdata$pheno_lm ~ fepdata$pS6.value_z * fepdata$pGSK3.value_z * fepdata$pAkt.value_z
## binomial
## fepdata$pS6.value_z
## 1.472502
## fepdata$pGSK3.value_z
## 1.769428
## fepdata$pAkt.value_z
## 1.698074
## fepdata$pS6.value_z:fepdata$pGSK3.value_z
## 1.936608
## fepdata$pS6.value_z:fepdata$pAkt.value_z
## 2.114839
## fepdata$pGSK3.value_z:fepdata$pAkt.value_z
## 1.743860
## fepdata$pS6.value_z:fepdata$pGSK3.value_z:fepdata$pAkt.value_z
## 1.888453
## [[1]]
## NULL
##
## [[2]]
## NULL
As a rule of thumb, a VIF value that exceeds 5 or 10 indicates a problematic amount of collinearity. In our example, there is no collinearity: all variables have a value of VIF well below 5.
So now we made sure all our models follows all the assumptions of the logistic regression!
Model Coefficients
get_model_sum <- function(model){
cat("----------------------------------------------------------")
print(summary(model))
cat("\n")
print(anova(model, test="Chisq"))
cat("----------------------------------------------------------")
cat("\n\n\n\n")
}
lapply(all_models, function(x) get_model_sum(x))
## ----------------------------------------------------------
## Call:
## glm(formula = fepdata$pheno_lm ~ fepdata$pS6.value_z + fepdata$pGSK3.value_z +
## fepdata$pAkt.value_z, family = "binomial")
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.7652 -0.6984 -0.1321 0.7114 2.2227
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.1938 0.3214 -0.603 0.546414
## fepdata$pS6.value_z 1.2154 0.3744 3.247 0.001168 **
## fepdata$pGSK3.value_z -1.5442 0.4593 -3.362 0.000774 ***
## fepdata$pAkt.value_z 0.7302 0.3522 2.074 0.038122 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 95.640 on 68 degrees of freedom
## Residual deviance: 62.456 on 65 degrees of freedom
## (3 observations deleted due to missingness)
## AIC: 70.456
##
## Number of Fisher Scoring iterations: 5
##
##
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: fepdata$pheno_lm
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 68 95.640
## fepdata$pS6.value_z 1 12.4445 67 83.195 0.0004192 ***
## fepdata$pGSK3.value_z 1 15.6045 66 67.591 7.807e-05 ***
## fepdata$pAkt.value_z 1 5.1345 65 62.456 0.0234552 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## ----------------------------------------------------------
##
##
##
## ----------------------------------------------------------
## Call:
## glm(formula = fepdata$pheno_lm ~ fepdata$pS6.value_z * fepdata$pGSK3.value_z *
## fepdata$pAkt.value_z, family = "binomial")
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.13245 -0.64440 -0.04255 0.60260 2.23839
##
## Coefficients:
## Estimate
## (Intercept) -0.02023
## fepdata$pS6.value_z 1.54788
## fepdata$pGSK3.value_z -1.81311
## fepdata$pAkt.value_z 1.19501
## fepdata$pS6.value_z:fepdata$pGSK3.value_z -0.86815
## fepdata$pS6.value_z:fepdata$pAkt.value_z 0.74249
## fepdata$pGSK3.value_z:fepdata$pAkt.value_z 0.10387
## fepdata$pS6.value_z:fepdata$pGSK3.value_z:fepdata$pAkt.value_z -0.32824
## Std. Error
## (Intercept) 0.39790
## fepdata$pS6.value_z 0.48242
## fepdata$pGSK3.value_z 0.57922
## fepdata$pAkt.value_z 0.46456
## fepdata$pS6.value_z:fepdata$pGSK3.value_z 0.61977
## fepdata$pS6.value_z:fepdata$pAkt.value_z 0.58163
## fepdata$pGSK3.value_z:fepdata$pAkt.value_z 0.51326
## fepdata$pS6.value_z:fepdata$pGSK3.value_z:fepdata$pAkt.value_z 0.50106
## z value Pr(>|z|)
## (Intercept) -0.051 0.95946
## fepdata$pS6.value_z 3.209 0.00133
## fepdata$pGSK3.value_z -3.130 0.00175
## fepdata$pAkt.value_z 2.572 0.01010
## fepdata$pS6.value_z:fepdata$pGSK3.value_z -1.401 0.16129
## fepdata$pS6.value_z:fepdata$pAkt.value_z 1.277 0.20175
## fepdata$pGSK3.value_z:fepdata$pAkt.value_z 0.202 0.83963
## fepdata$pS6.value_z:fepdata$pGSK3.value_z:fepdata$pAkt.value_z -0.655 0.51240
##
## (Intercept)
## fepdata$pS6.value_z **
## fepdata$pGSK3.value_z **
## fepdata$pAkt.value_z *
## fepdata$pS6.value_z:fepdata$pGSK3.value_z
## fepdata$pS6.value_z:fepdata$pAkt.value_z
## fepdata$pGSK3.value_z:fepdata$pAkt.value_z
## fepdata$pS6.value_z:fepdata$pGSK3.value_z:fepdata$pAkt.value_z
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 95.640 on 68 degrees of freedom
## Residual deviance: 56.194 on 61 degrees of freedom
## (3 observations deleted due to missingness)
## AIC: 72.194
##
## Number of Fisher Scoring iterations: 6
##
##
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: fepdata$pheno_lm
##
## Terms added sequentially (first to last)
##
##
## Df Deviance
## NULL
## fepdata$pS6.value_z 1 12.4445
## fepdata$pGSK3.value_z 1 15.6045
## fepdata$pAkt.value_z 1 5.1345
## fepdata$pS6.value_z:fepdata$pGSK3.value_z 1 2.3553
## fepdata$pS6.value_z:fepdata$pAkt.value_z 1 3.1912
## fepdata$pGSK3.value_z:fepdata$pAkt.value_z 1 0.2161
## fepdata$pS6.value_z:fepdata$pGSK3.value_z:fepdata$pAkt.value_z 1 0.5002
## Resid. Df
## NULL 68
## fepdata$pS6.value_z 67
## fepdata$pGSK3.value_z 66
## fepdata$pAkt.value_z 65
## fepdata$pS6.value_z:fepdata$pGSK3.value_z 64
## fepdata$pS6.value_z:fepdata$pAkt.value_z 63
## fepdata$pGSK3.value_z:fepdata$pAkt.value_z 62
## fepdata$pS6.value_z:fepdata$pGSK3.value_z:fepdata$pAkt.value_z 61
## Resid. Dev
## NULL 95.640
## fepdata$pS6.value_z 83.195
## fepdata$pGSK3.value_z 67.591
## fepdata$pAkt.value_z 62.456
## fepdata$pS6.value_z:fepdata$pGSK3.value_z 60.101
## fepdata$pS6.value_z:fepdata$pAkt.value_z 56.910
## fepdata$pGSK3.value_z:fepdata$pAkt.value_z 56.694
## fepdata$pS6.value_z:fepdata$pGSK3.value_z:fepdata$pAkt.value_z 56.194
## Pr(>Chi)
## NULL
## fepdata$pS6.value_z 0.0004192 ***
## fepdata$pGSK3.value_z 7.807e-05 ***
## fepdata$pAkt.value_z 0.0234552 *
## fepdata$pS6.value_z:fepdata$pGSK3.value_z 0.1248544
## fepdata$pS6.value_z:fepdata$pAkt.value_z 0.0740347 .
## fepdata$pGSK3.value_z:fepdata$pAkt.value_z 0.6420501
## fepdata$pS6.value_z:fepdata$pGSK3.value_z:fepdata$pAkt.value_z 0.4793909
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## ----------------------------------------------------------
## [[1]]
## NULL
##
## [[2]]
## NULL
Model output explanation:
Coefficients:
Goodness of fit: The Residual deviance is a measure of the lack of fit of your model taken as a whole, whereas the Null deviance is such a measure for a reduced model that only includes the intercept. Notice that the degrees of freedom associated with these two differs by only the number of variables we included in the model. model_1 has only two covariates (two additional degrees of freedom), while model_2 has four (four additional degrees of freedom).
The AIC is another measure of goodness of fit that takes into account the ability of the model to fit the data. This is very useful when comparing two models.
Summary
This is a scatterplot matrix between pAkt, pS6, pGSK3 and phenotype values.
panel.cor <- function(x, y, digits = 2, prefix = "", cex.cor, ...)
{
usr <- par("usr"); on.exit(par(usr))
par(usr = c(0, 1, 0, 1))
cortest <- cor.test(x, y, use="complete.obs", method = "spearman", exact = F)
r = abs(cortest$estimate)
p = cortest$p.value
txt1 <- format(c(r, 0.123456789), digits = digits)[1]
txt2 <- format(c(p, 0.123456789), digits = digits)[1]
txt <- paste0(prefix, "r=",txt1, ", ", "p=", txt2)
text(0.5, 0.5, txt, cex = 0.8, font = 4)
}
reg <- function(x, y, col) abline(lm(y~x), col=col)
panel.lm = function (x, y, col = par("col"), bg = NA, pch = par("pch"),
cex = 1, col.smooth = "red", span = 2/3, iter = 3, ...) {
points(x, y, pch = pch, col = col, bg = bg, cex = 0.7)
ok <- is.finite(x) & is.finite(y)
if (any(ok)) reg(x[ok], y[ok], col.smooth)
}
#png("FEPvsControls_v5_pairs_all.png", units="in", pointsize = 15, width=7, height=5, res=1800)
pairs(fepdata[c(12:14,10)], panel = panel.lm,
cex = 50, pch = 19, col = fepdata$colour, cex.labels = 1,
font.labels = 2, lower.panel = panel.cor)
#dev.off()
scatterplotMatrix(x=fepdata[c(6:8,10)], groups = fepdata$Phenotype, col = selcols, legend = F)
In this matrix scatterplot, the diagonal cells show histograms of each of the variables, in this case the values of pAkt, pS6, pGSK3 and the phenotype (0 for controls, 1 for cases) values.
Each of the off-diagonal cells in the upper part of the table is a scatterplot of two of the 4 variables with a regression line fitted, for example, A4 is a scatterplot of pAkt(y) against Phenotype (0 OR 1).
Each of the off-diagonal cells in the lower part of the table contains the Spearman Correlation value of two of the 4 variables. for example, C1 shows the Pearson correlation between PAkt and PGSK3.
fepdata_temp = (subset(fepdata,Phenotype==0))
panel.cor <- function(x, y, digits = 2, prefix = "", cex.cor, ...)
{
usr <- par("usr"); on.exit(par(usr))
par(usr = c(0, 1, 0, 1))
cortest <- cor.test(x, y, use="complete.obs", method = "spearman", exact = F)
r = abs(cortest$estimate)
p = cortest$p.value
txt1 <- format(c(r, 0.123456789), digits = digits)[1]
txt2 <- format(c(p, 0.123456789), digits = digits)[1]
txt <- paste0(prefix, "r=",txt1, ", ", "p=", txt2)
text(0.5, 0.5, txt, cex = 1, font = 4)
}
reg <- function(x, y, col) abline(lm(y~x), col=col)
panel.lm = function (x, y, col = par("col"), bg = NA, pch = par("pch"),
cex = 1, col.smooth = "red", span = 2/3, iter = 3, ...) {
points(x, y, pch = pch, col = col, bg = bg, cex = cex)
ok <- is.finite(x) & is.finite(y)
if (any(ok)) reg(x[ok], y[ok], col.smooth)
}
pairs(fepdata_temp[c(6:8)], panel = panel.lm,
cex = 1.5, pch = 19, col = fepdata_temp$colour, cex.labels = 2,
font.labels = 2, lower.panel = panel.cor)
scatterplotMatrix(x=fepdata_temp[c(12:14)], col = selcols[1])
Same description as above.
fepdata_temp = (subset(fepdata,Phenotype==1))
panel.cor <- function(x, y, digits = 2, prefix = "", cex.cor, ...)
{
usr <- par("usr"); on.exit(par(usr))
par(usr = c(0, 1, 0, 1))
cortest <- cor.test(x, y, use="complete.obs", method = "spearman", exact = F)
r = abs(cortest$estimate)
p = cortest$p.value
txt1 <- format(c(r, 0.123456789), digits = digits)[1]
txt2 <- format(c(p, 0.123456789), digits = digits)[1]
txt <- paste0(prefix, "r=",txt1, ", ", "p=", txt2)
text(0.5, 0.5, txt, cex = 1, font = 4)
}
reg <- function(x, y, col) abline(lm(y~x), col=col)
panel.lm = function (x, y, col = par("col"), bg = NA, pch = par("pch"),
cex = 1, col.smooth = "red", span = 2/3, iter = 3, ...) {
points(x, y, pch = pch, col = col, bg = bg, cex = cex)
ok <- is.finite(x) & is.finite(y)
if (any(ok)) reg(x[ok], y[ok], col.smooth)
}
pairs(fepdata_temp[c(12:14)], panel = panel.lm,
cex = 1.5, pch = 19, col = fepdata_temp$colour, cex.labels = 2,
font.labels = 2, lower.panel = panel.cor)
scatterplotMatrix(x=fepdata_temp[c(12:14)], col = selcols[2])
Same description as above.
Adding all variables
fepdata_temp = (subset(fepdata,Phenotype==1))
panel.cor <- function(x, y, digits = 2, prefix = "", cex.cor, ...)
{
usr <- par("usr"); on.exit(par(usr))
par(usr = c(0, 1, 0, 1))
cortest <- cor.test(x, y, use="complete.obs", method = "spearman", exact = F)
r = abs(cortest$estimate)
p = cortest$p.value
txt1 <- format(c(r, 0.123456789), digits = digits)[1]
txt2 <- format(c(p, 0.123456789), digits = digits)[1]
txt <- paste0(prefix, "r=",txt1, ", ", "p=", txt2)
text(0.5, 0.5, txt, cex = 1.2, font = 4)
}
reg <- function(x, y, col) abline(lm(y~x), col=col)
panel.lm = function (x, y, col = par("col"), bg = NA, pch = par("pch"),
cex = 1, col.smooth = "red", span = 2/3, iter = 3, ...) {
points(x, y, pch = pch, col = col, bg = bg, cex = cex)
ok <- is.finite(x) & is.finite(y)
if (any(ok)) reg(x[ok], y[ok], col.smooth)
}
#png("FEP_v5_pairs_all.png", units="in", width=10, height=8, res=1800)
pairs(fepdata_temp[c(2:5,12:14)], panel = panel.lm,
cex = 1.2, pch = 19, col = fepdata_temp$colour, cex.labels = 1.5,
font.labels = 1, lower.panel = panel.cor)
#dev.off()
Cases vs Controls in the same graph
Same graph - Red Cases - Blue Controls - GSK3 same color range
fepdata_cont = (subset(fepdata,Phenotype==0))
fepdata_case = (subset(fepdata,Phenotype==1))
gsk_max = max(c(fepdata_cont$pGSK3.value_z,fepdata_case$pGSK3.value_z), na.rm=TRUE)
gsk_min = min(c(fepdata_cont$pGSK3.value_z,fepdata_case$pGSK3.value_z), na.rm=TRUE)
akt_max = max(c(fepdata_cont$pAkt.value_z,fepdata_case$pAkt.value_z), na.rm=TRUE)
akt_min = min(c(fepdata_cont$pAkt.value_z,fepdata_case$pAkt.value_z),na.rm=TRUE)
ps6_max = max(c(fepdata_cont$pS6.value_z,fepdata_case$pS6.value_z), na.rm=TRUE)
ps6_min = min(c(fepdata_cont$pS6.value_z,fepdata_case$pS6.value_z), na.rm=TRUE)
pp <- ggplot() +
geom_point(data=fepdata_cont, aes(x=pAkt.value_z, y=pS6.value_z, color=pGSK3.value_z),shape=19,size=3) +
scale_color_gradient(low="white", high="#0000cc",limits=c(gsk_min-0.5,gsk_max)) +#
new_scale_colour() +
geom_point(data=fepdata_case, aes(x=pAkt.value_z, y=pS6.value_z, color=pGSK3.value_z),shape=19,size=3) +
scale_color_gradient(low="white", high="#cc0000",limits=c(gsk_min-0.5,gsk_max)) +
xlim(akt_min-0.1,akt_max+0.1) + ylim(ps6_min-0.1,ps6_max+0.1) +
theme_bw()
pp
pp3 <- ggplot() +
geom_point(data=fepdata, aes(x=pAkt.value_z, y=pS6.value_z, size=pGSK3.value_z, color=pheno_lm),shape=20) +
scale_color_manual(values = c("#DC267F","#648FFF")) +
xlim(akt_min-0.1,akt_max+0.1) + ylim(ps6_min-0.1,ps6_max+0.1) +
theme_bw()
pp3
pp2 <- ggplot() +
geom_point(data=fepdata_cont, aes(x=pGSK3.value_z, y=pS6.value_z, color=pAkt.value_z),shape=19,size=5) +
scale_color_gradient(low="white", high="#1a5bff",limits=c(akt_min-0.5,akt_max)) +#
new_scale_colour() +
geom_point(data=fepdata_case, aes(x=pGSK3.value_z, y=pS6.value_z, color=pAkt.value_z),shape=19,size=5) +
scale_color_gradient(low="white", high="#c52070",limits=c(akt_min-0.5,akt_max)) +
xlim(gsk_min-0.1,gsk_max+0.1) + ylim(ps6_min-0.1,ps6_max+0.1) +
theme_bw()
pp2
pp4 <- ggplot() +
geom_point(data=fepdata, aes(x=pGSK3.value_z, y=pS6.value_z, size=pAkt.value_z, color=pheno_lm),shape=20) +
scale_color_manual(values = c("#DC267F","#648FFF")) +
xlim(gsk_min-0.1,gsk_max+0.1) + ylim(ps6_min-0.1,ps6_max+0.1) +
theme_bw()
pp4
#png("FEPvsControls_v5_all_p_gsk3size.png", units="in", width=8, height=5, res=1800)
pp3 + theme(axis.text = element_text(size = 14), legend.text=element_text(size=14), legend.title=element_text(size=15),
axis.title=element_text(size=15)) + labs(size="pGSK3 Z-value", colour="Phenotype") + scale_size(range = c(3,9))
#dev.off()
#png("FEPvsControls_v5_all_p_aktsize.png", units="in", width=8, height=5, res=1800)
pp4 + theme(axis.text = element_text(size = 14), legend.text=element_text(size=14), legend.title=element_text(size=15),
axis.title=element_text(size=15)) + labs(size="pAkt Z-value", colour="Phenotype") + scale_size(range = c(3,10))
#dev.off()
#png("FEPvsControls_v5_all_p_gsk3size2.png", units="in", width=8, height=5, res=1800)
pp3 + theme(axis.text = element_text(size = 14), legend.text=element_text(size=14), legend.title=element_text(size=15),
axis.title=element_text(size=15)) + labs(size="pGSK3 Z-value", colour="Phenotype") + scale_size(range = c(2,8))
#dev.off()
#png("FEPvsControls_v5_all_p_aktsize2.png", units="in", width=8, height=5, res=1800)
pp4 + theme(axis.text = element_text(size = 14), legend.text=element_text(size=14), legend.title=element_text(size=15),
axis.title=element_text(size=15)) + labs(size="pAkt Z-value", colour="Phenotype") + scale_size(range = c(2,9))
#dev.off()
#png("FEPvsControls_v5_all_p_v2.png", units="in", width=8, height=5, res=1800)
#pp2 + theme(axis.text = element_text(size = 14), legend.text=element_text(size=10), legend.title=element_text(size=15),
# axis.title=element_text(size=15))
#dev.off()
3D
#3D
plot3d(
x=fepdata$pAkt.value_z, y=fepdata$pS6.value_z, z=fepdata$pGSK3.value_z,
col = fepdata$colour,
type = 's',
radius = 0.05,
xlab="pAkt", ylab="pS6", zlab="GSK3")
#writeWebGL(filename="3dscatter_all.html" , width=600, height=600)
Open The attached 3dscatter_all.html to view the 3D plot.
fepdata_temp = (subset(fepdata,Phenotype==0))
sp3b<-ggplot(fepdata_temp, aes(x=pAkt.value_z, y=pS6.value_z, color=pGSK3.value_z)) + geom_point(size=4, shape=19) +
ggtitle("Controls") +
scale_color_gradient(low="white", high="#0000cc",limits=c(gsk_min-0.5,gsk_max)) +
xlim(akt_min-0.1,akt_max+0.1) + ylim(ps6_min-0.1,ps6_max+0.1) +
theme_bw()
sp3b
sp3b2<-ggplot(fepdata_temp, aes(x=pGSK3.value_z, y=pS6.value_z, color=pAkt.value_z)) + geom_point(size=4, shape=19) +
ggtitle("Controls") +
scale_color_gradient(low="white", high="#0000cc",limits=c(akt_min-0.5,akt_max)) +
xlim(gsk_min-0.1,gsk_max+0.1) + ylim(ps6_min-0.1,ps6_max+0.1) +
theme_bw()
sp3b2
par(mar=c(0,0,0,0))
plot3d(
x=fepdata_temp$pAkt.value_z, y=fepdata_temp$pS6.value_z, z=fepdata_temp$pGSK3.value_z,
col = fepdata_temp$colour,
type = 's',
radius = 0.05,
xlab="pAkt", ylab="pS6", zlab="GSK3")
#writeWebGL( filename="3dscatter_controls.html" , width=600, height=600)
Open The attached 3dscatter_controls.html to view the 3D plot
fepdata_temp = (subset(fepdata,Phenotype==1))
sp4a<-ggplot(fepdata_temp, aes(x=pAkt.value_z, y=pS6.value_z, color=pGSK3.value_z)) + geom_point(size=4, shape=19) +
ggtitle("Cases") +
scale_color_gradient(low="white", high="#cc0000",limits=c(gsk_min-0.5,gsk_max)) +
xlim(akt_min-0.1,akt_max+0.1) + ylim(ps6_min-0.1,ps6_max+0.1) +
theme_bw()
sp4a
sp4a2<-ggplot(fepdata_temp, aes(x=pGSK3.value_z, y=pS6.value_z, color=pAkt.value_z)) + geom_point(size=4, shape=19) +
ggtitle("Cases") +
scale_color_gradient(low="white", high="#cc0000",limits=c(akt_min-0.5,akt_max)) +
xlim(gsk_min-0.1,gsk_max+0.1) + ylim(ps6_min-0.1,ps6_max+0.1) +
theme_bw()
sp4a2
par(mar=c(0,0,0,0))
plot3d(
x=fepdata_temp$pAkt.value, y=fepdata_temp$pS6.value, z=fepdata_temp$pGSK3.value,
col = fepdata_temp$colour,
type = 's',
radius = 0.05,
xlab="pAkt", ylab="pS6", zlab="GSK3")
writeWebGL( filename="3dscatter_cases.html" , width=600, height=600)
Open The attached 3dscatter_cases.html to view the 3D plot
grid.arrange(sp3b,sp4a,ncol=2)
print_p_cor <- function(x,y,digits=4,prefix=""){
cortest <- cor.test(x, y, use="complete.obs", method = "spearman", exact = F)
r = abs(cortest$estimate)
p = cortest$p.value
txt1 <- format(c(r, 0.123456789), digits = digits)[1]
txt2 <- format(c(p, 0.123456789), digits = digits)[1]
txt <- paste0(prefix, "r=",txt1, ", ", "p=", txt2)
return(txt)
}
fepdata_cont = (subset(fepdata,Phenotype==0))
fepdata_case = (subset(fepdata,Phenotype==1))
cont_plot<-ggplot(fepdata_cont, aes(x=pAkt.value_z, y=pGSK3.value_z)) + geom_point(size=2, colour="#253582ff") +
annotate("text", x=2.4, y=-2, size=3, label=paste0(print_p_cor(fepdata_cont$pAkt.value,fepdata_cont$pGSK3.value)," ns")) +
xlab("pAkt") + ylab("pGSK3") + ggtitle("Controls") + stat_smooth(method=lm, colour="#253582ff") + theme_classic()
case_plot<-ggplot(fepdata_case, aes(x=pAkt.value_z, y=pGSK3.value_z)) + geom_point(size=2, colour="#de7065ff") +
annotate("text", x=1.2, y=-2, size=3, label=paste0(print_p_cor(fepdata_case$pAkt.value,fepdata_case$pGSK3.value)," ns")) +
xlab("pAkt") + ylab("pGSK3") + ggtitle("Cases") + stat_smooth(method=lm, colour="#de7065ff") + theme_classic()
grid.arrange(cont_plot, case_plot, ncol=2)
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_GB.UTF-8/en_GB.UTF-8/en_GB.UTF-8/C/en_GB.UTF-8/en_GB.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] gridExtra_2.3 viridis_0.6.1 viridisLite_0.4.0 broom_0.7.7
## [5] ggpubr_0.4.0 ggnewscale_0.4.5 ggplot2_3.3.4 car_3.0-10
## [9] carData_3.0-4 readr_1.4.0 dplyr_1.0.7 tidyr_1.1.3
## [13] rglwidget_0.2.1 rgl_0.106.8 knitr_1.33
##
## loaded via a namespace (and not attached):
## [1] sass_0.4.0 jsonlite_1.7.2 splines_4.1.0
## [4] bslib_0.2.5.1 shiny_1.6.0 highr_0.9
## [7] cellranger_1.1.0 yaml_2.2.1 lattice_0.20-44
## [10] pillar_1.6.1 backports_1.2.1 glue_1.4.2
## [13] digest_0.6.27 manipulateWidget_0.11.0 promises_1.2.0.1
## [16] ggsignif_0.6.2 colorspace_2.0-1 Matrix_1.3-3
## [19] htmltools_0.5.1.1 httpuv_1.6.1 pkgconfig_2.0.3
## [22] haven_2.4.1 purrr_0.3.4 xtable_1.8-4
## [25] scales_1.1.1 openxlsx_4.2.4 later_1.2.0
## [28] rio_0.5.27 tibble_3.1.2 mgcv_1.8-35
## [31] generics_0.1.0 farver_2.1.0 ellipsis_0.3.2
## [34] withr_2.4.2 magrittr_2.0.1 crayon_1.4.1
## [37] readxl_1.3.1 mime_0.11 evaluate_0.14
## [40] fansi_0.5.0 nlme_3.1-152 rstatix_0.7.0
## [43] forcats_0.5.1 foreign_0.8-81 tools_4.1.0
## [46] data.table_1.14.0 hms_1.1.0 lifecycle_1.0.0
## [49] stringr_1.4.0 munsell_0.5.0 zip_2.2.0
## [52] compiler_4.1.0 jquerylib_0.1.4 rlang_0.4.11
## [55] grid_4.1.0 htmlwidgets_1.5.3 crosstalk_1.1.1
## [58] miniUI_0.1.1.1 labeling_0.4.2 rmarkdown_2.9
## [61] gtable_0.3.0 abind_1.4-5 curl_4.3.2
## [64] R6_2.5.0 fastmap_1.1.0 utf8_1.2.1
## [67] stringi_1.6.2 Rcpp_1.0.6 vctrs_0.3.8
## [70] tidyselect_1.1.1 xfun_0.24